package resolver

import types.Hit

/**
 * User: wohlgemuth
 * Date: Feb 8, 2010
 * Time: 11:10:04 AM
 *
 */
abstract class SplitResolver extends AbstractResolver implements Resolveable, BatchedResolver {

  //not needed
  double confidenceLevel = 0

  //list which conatins words which are mostly found in combinations
  //should be loaded from file
  static def nextCombinations = ['acid', 'acids', 'agar', 'acetate', 'chloride', 'alcohol', 'medium', 'broth']

  //can be in the next word
  static def previousCombinations = ['vitamin']

  //if we suport batching than we want to use this set
  Set<String> batchSet = new HashSet<String>()

  //do we enable batch execution by default if the batch sets are empty
  boolean batchEnabledByDefault = false

  /**
   * resolve all the compounds in the given input string
   */
  def Set<Hit> resolve(String input) {
    Set<Hit> result = new HashSet<Hit>()

    if (batchSet.isEmpty() && batchEnabledByDefault) {
      logger.debug "batch set is empty and batches are allowed, so we execute in batch mode instead..."

      addBatch input
      result = resolve()
    }
    else {
      logger.debug "execute in normal mode without batching..."
      //splits the input and removes duplicates
      Set<String> queries = splitInput(input)

      //execute the actual search
      queries.each {String v ->
        result.addAll(workSingleEntry(v))
      }
    }
    return result;  //To change body of implemented methods use File | Settings | File Templates.
  }

  /**
   * works on a single entry to see if it's in the cache of not
   */
  Set<Hit> workSingleEntry(String v) {

    Set<Hit> result = new HashSet<Hit>()

    if (isCached(v)) {
      result.add(getCached(v))
    }
    else {
      for (Hit hit: discoverCompound(v)) {
        result.add(hit)
      }
    }

    return result
  }

  /**
   * split the input and store it in the result set
   * we use to lower case to reduce the amound of data incase we have things which are written in different cases
   */
  private Set<String> splitInput(String input) {
    Set<String> queries = new HashSet<String>()

    //splits it by new lines
    new StringTokenizer(input, "\n").each {String s ->

      logger.debug "current line: ${s}"
      
      def last = ""

      boolean next = false
      //splits by each spaces
      new StringTokenizer(s, " ").each {
        String v ->

        v = v.trim()
        v = nameCleanup(v)

        //we don't want empty strings
        if (v.size() > 0) {
          if (checkForNextCombinations(v)) {
            //remove the last hit
            queries.remove(last)

            //rebuild to the correct hit
            last = last + " " + v

            logger.debug("next: ${last} ")

            //add hit
            queries.add(last)
          }
          else if (checkForPreviousCombinations(v)) {
            logger.debug("last: ${last}")
            last = v
            next = true
          }
          else if (next) {
            next = false
            last = last + " " + v

            logger.debug("next: ${last}")

            queries.add(last)
          }

          else {
            last = v
            queries.add(last)
          }
        }
      }
    }

    //add intercepors to remove unwanted terms

    queries = this.runInterception(queries)



    return queries
  }

  /**
   * needed since some compounds are combinations of words
   */
  protected boolean checkForNextCombinations(String v) {
    return nextCombinations.contains(v.toLowerCase())
  }

  protected boolean checkForPreviousCombinations(String v) {
    return previousCombinations.contains(v.toLowerCase())
  }

  /**
   *   removes some unwanted characters from the value
   */
  protected String nameCleanup(String value) {
    logger.debug "before cleanup: ${value}"

    String result = value

    for (int c in getDirtyChars()) {
      int lastIndex = result.lastIndexOf(c, result.length() - 1)

      if (lastIndex == result.length() - 1) {
        result = result.substring(0, lastIndex)
      }
    }
    

    logger.debug "after cleanup: ${result}"

    return result
  }

  /**
   * chars which should not be on the begin and the end
   * @return
   */
  protected List getDirtyChars() {
    return ['?', '.', '!', ')', '(', '[', ']', ',', '\"', ' ', '\'', ';', ':']
  }

  /**
   * a batch resolve for the registered batches
   */
  final synchronized def Set<Hit> resolve() {
    logger.debug "executing batch resolve..."
    Set<Hit> result = workOnBatch(batchSet)
    batchSet.clear()
    //return the result
    return result;
  }

  /**
   * default implementation for a batchset
   */
  Set<Hit> workOnBatch(Set<String> batch) {

    //runs the interceptions, kinda uggly and should be implementation indifferent

    Set<Hit> result = new HashSet<Hit>()

    //executes for each registered document a batch
    batch.each {String value ->
      result.addAll(workSingleEntry(value))
    }

    //return the result
    return result;
  }

  /**
   * discovers a set of compounds for the given value
   */
  public abstract Set<Hit> discoverCompound(String value)

  /**
   * add a batch to the list
   */
  final void addBatch(String toResolve) {
    //add all to this batch set after they are split
    this.batchSet.addAll(splitInput(toResolve))
  }

}

